{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Recursively split JSON"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_text_splitters import RecursiveJsonSplitter\n",
    "import requests\n",
    "\n",
    "# This is a large nested json object and will be loaded as a python dict\n",
    "json_data = requests.get(\"https://api.smith.langchain.com/openapi.json\").json()\n",
    "splitter = RecursiveJsonSplitter(max_chunk_size=300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "page_content='{\"openapi\": \"3.1.0\", \"info\": {\"title\": \"LangSmith\", \"version\": \"0.1.0\"}, \"paths\": {\"/api/v1/sessions/{session_id}\": {\"get\": {\"tags\": [\"tracer-sessions\"], \"summary\": \"Read Tracer Session\", \"description\": \"Get a specific session.\"}}}}'\n",
      "page_content='{\"paths\": {\"/api/v1/sessions/{session_id}\": {\"get\": {\"operationId\": \"read_tracer_session_api_v1_sessions__session_id__get\", \"security\": [{\"API Key\": []}, {\"Tenant ID\": []}, {\"Bearer Auth\": []}]}}}}'\n"
     ]
    }
   ],
   "source": [
    "# The splitter can also output documents\n",
    "docs = splitter.create_documents(texts=[json_data])\n",
    "print(docs[0])\n",
    "print(docs[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\"openapi\": \"3.1.0\", \"info\": {\"title\": \"LangSmith\", \"version\": \"0.1.0\"}, \"paths\": {\"/api/v1/sessions/{session_id}\": {\"get\": {\"tags\": [\"tracer-sessions\"], \"summary\": \"Read Tracer Session\", \"description\": \"Get a specific session.\"}}}}\n",
      "{\"paths\": {\"/api/v1/sessions/{session_id}\": {\"get\": {\"operationId\": \"read_tracer_session_api_v1_sessions__session_id__get\", \"security\": [{\"API Key\": []}, {\"Tenant ID\": []}, {\"Bearer Auth\": []}]}}}}\n"
     ]
    }
   ],
   "source": [
    "texts = splitter.split_text(json_data=json_data)\n",
    "print(texts[0])\n",
    "print(texts[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'openapi': '3.1.0', 'info': {'title': 'LangSmith', 'version': '0.1.0'}, 'paths': {'/api/v1/sessions/{session_id}': {'get': {'tags': ['tracer-sessions'], 'summary': 'Read Tracer Session', 'description': 'Get a specific session.'}}}}\n",
      "{'paths': {'/api/v1/sessions/{session_id}': {'get': {'operationId': 'read_tracer_session_api_v1_sessions__session_id__get', 'security': [{'API Key': []}, {'Tenant ID': []}, {'Bearer Auth': []}]}}}}\n"
     ]
    }
   ],
   "source": [
    "# Recursively split json data - If you need to access/manipulate the smaller json chunks\n",
    "json_chunks = splitter.split_json(json_data=json_data)\n",
    "print(json_chunks[0])\n",
    "print(json_chunks[1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "langchain0_1",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
